import pandas as pd
from news_labeler import NewsLabeler
from numpy import nan
import urllib.parse

class Tagger(object):
    def __init__(self):
        self.ner = pd.DataFrame()
        self.load()

    def load(self):
        print('loading')
        # self.ner = pd.read_csv("/output/unlimited_wikifier.csv")
        self.ner = pd.read_excel("./output/fix.xlsx")
        self.ner.drop("Unnamed: 0", axis=1, inplace=True)
        print('finished')

    def tagger(self):
        for i, dp in self.ner.iterrows():
            tag = dp["tag"]
            sentence = dp["Sentence"]
            abstract = dp["abstract"]

            if tag == 'dbpedia':
                print("skip dbpedia")
                continue
            elif tag == 'tagger_to_finish' and abstract is not nan:
                print("wiki tagger")
                labels = NewsLabeler(abstract).labeler()
                self.ner.loc[i, 'tag'] = 'wiki'
            elif tag == 'tagger_to_finish' and abstract is nan:
                print("sentence tagger")
                self.ner.loc[i, 'tag'] = 'sentence'
                labels = NewsLabeler(sentence).labeler()
            else:
                print("sentence tagger")
                labels = NewsLabeler(sentence).labeler()

            df_row = []
            for label in labels:
                df_row.append(label['label'])
                df_row.append(label['score'])
            self.ner.loc[i, 'Topic 1':'Topic 20 Score'] = df_row

        self.ner.to_csv('./output/abstract_tagger.csv')
        self.ner.to_excel('./output/abstract_tagger.xlsx')
        print("done")



